{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RRYSu48huSUW",
        "outputId": "a54b6db7-0d12-4f0b-a1d3-9e5166461773"
      },
      "outputs": [],
      "source": [
        "!pip -q install  openai tiktoken\n",
        "%pip install git+https://github.com/hwchase17/langchain"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "dNA4TsHpu6OM"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "\n",
        "os.environ[\"OPENAI_API_KEY\"] = \"\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "J-KFB7J_u_3L",
        "outputId": "4f14e74c-7081-48b2-850a-3b173baa916d"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Name: langchain\n",
            "Version: 0.0.237\n",
            "Summary: Building applications with LLMs through composability\n",
            "Home-page: https://www.github.com/hwchase17/langchain\n",
            "Author: \n",
            "Author-email: \n",
            "License: MIT\n",
            "Location: C:\\Users\\freestone\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python311\\site-packages\n",
            "Requires: aiohttp, dataclasses-json, langsmith, numexpr, numpy, openapi-schema-pydantic, pydantic, PyYAML, requests, SQLAlchemy, tenacity\n",
            "Required-by: \n"
          ]
        }
      ],
      "source": [
        "!pip show langchain"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "HqwsGJDhvAQ5"
      },
      "source": [
        "## Classification / Tagging\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 44,
      "metadata": {
        "id": "IfCt8bhHNu9u"
      },
      "outputs": [],
      "source": [
        "from langchain.chat_models import ChatOpenAI\n",
        "from langchain.chains import LLMChain\n",
        "from langchain.prompts import ChatPromptTemplate\n",
        "from pydantic import BaseModel, Field\n",
        "from enum import Enum\n",
        "from langchain.chains.openai_functions import (\n",
        "    create_tagging_chain,\n",
        "    create_tagging_chain_pydantic,\n",
        ")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {},
      "outputs": [],
      "source": [
        "class PersonalDetails(BaseModel):\n",
        "    # 定义数据的类型\n",
        "    name: str = Field(\n",
        "        ...,\n",
        "        description = \"这是用户输入的名字\"\n",
        "    )\n",
        "    city: str = Field(\n",
        "        ...,\n",
        "        description = \"这是用户输入的居住城市\"\n",
        "    )\n",
        "    email: str = Field(\n",
        "        ...,\n",
        "        description = \"这是用户输入的邮箱地址\"\n",
        "    )"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "id": "--pSvL50li1z"
      },
      "outputs": [],
      "source": [
        "llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 22,
      "metadata": {},
      "outputs": [],
      "source": [
        "chain = create_tagging_chain_pydantic(PersonalDetails,llm)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 184,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "PersonalDetails(name='美丽', city='上海浦东', email='liteli1987@gmail.com')"
            ]
          },
          "execution_count": 184,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "test_str1 = \"你好，我是美丽，我住在上海浦东，我的邮箱是： liteli1987@gmail.com\"\n",
        "test_res1 = chain.run(test_str1)\n",
        "test_res1"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "第二个测试，我只告诉她我的邮箱。机器人只记录了邮箱。"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 174,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "PersonalDetails(name='', city='', email='liteli1987@gmail.com')"
            ]
          },
          "execution_count": 174,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "test_str2 = \"我的邮箱是： liteli1987@gmail.com\"\n",
        "test_res2 = chain.run(test_str2)\n",
        "test_res2"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "我们可以再来第三个测试，不告诉机器人我的名字，但是告诉他邮箱，而且还故意告诉他我弟弟的邮箱。"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 177,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "PersonalDetails(name='美丽', city='', email='')"
            ]
          },
          "execution_count": 177,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "test_str3 = \"我叫美丽，我弟弟的邮箱是：1106968391@qq.com\"\n",
        "test_res3 = chain.run(test_str3)\n",
        "test_res3"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "我们可以看到结果是'', 如果没有匹配我们定义的PersonalDetails对象里的数据类型，机器人并不会瞎编乱造。"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 35,
      "metadata": {},
      "outputs": [],
      "source": [
        "user_007_personal_details = PersonalDetails(name=\"\",city=\"\",email=\"\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 36,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "PersonalDetails(name='', city='', email='')"
            ]
          },
          "execution_count": 36,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "user_007_personal_details"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "定义一个函数，用于检查数据是否填写完整。"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 37,
      "metadata": {},
      "outputs": [],
      "source": [
        "def check_what_is_empty(user_personal_details):\n",
        "    ask_for = []\n",
        "    # 检查项目是否为空\n",
        "    for field,value in user_personal_details.dict().items():\n",
        "        if value in [None, \"\", 0]: \n",
        "            print(f\"Field '{field}' 为空\" )\n",
        "            ask_for.append(f'{field}')\n",
        "    return ask_for        "
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "我们来测试一下用户007是否填写完整了。"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 38,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Field 'name' 为空\n",
            "Field 'city' 为空\n",
            "Field 'email' 为空\n"
          ]
        },
        {
          "data": {
            "text/plain": [
              "['name', 'city', 'email']"
            ]
          },
          "execution_count": 38,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "ask_for = check_what_is_empty(user_007_personal_details)\n",
        "ask_for"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "我们再来定义一个函数，用于获取用户的输入信息并且更新用户的信息。"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 180,
      "metadata": {},
      "outputs": [],
      "source": [
        "def add_non_empty_details(current_details:PersonalDetails, new_details:PersonalDetails):\n",
        "    # 这是已经填好的用户信息\n",
        "    non_empty_details = {k:v for k,v in new_details.dict().items() if v not in [None, \"\", 0]}\n",
        "    update_details = current_details.copy(update=non_empty_details)\n",
        "    return update_details"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 181,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "PersonalDetails(name='007', city='南京', email='XX@qq.com')"
            ]
          },
          "execution_count": 181,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "res = chain.run(\"我的名字007\") \n",
        "user_007_personal_details = add_non_empty_details(user_007_personal_details,res)\n",
        "user_007_personal_details"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 149,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "PersonalDetails(name='007', city='南京', email='XX@qq.com')"
            ]
          },
          "execution_count": 149,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "res = chain.run(\"我住在南京\") \n",
        "user_007_personal_details = add_non_empty_details(user_007_personal_details,res)\n",
        "user_007_personal_details"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 150,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "PersonalDetails(name='007', city='南京', email='XX@qq.com')"
            ]
          },
          "execution_count": 150,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "res = chain.run(\"我的邮箱是XX@qq.com\") \n",
        "user_007_personal_details = add_non_empty_details(user_007_personal_details,res)\n",
        "user_007_personal_details"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "测试一下哪一项没有填写"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 182,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "[]"
            ]
          },
          "execution_count": 182,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "ask_for = check_what_is_empty(user_007_personal_details)\n",
        "ask_for"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 152,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "谢谢您的回答！我没有问题了\n"
          ]
        }
      ],
      "source": [
        "if not ask_for:\n",
        "    print(\"谢谢您的回答！我没有问题了\")"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "使用自定义提示模板，实现机器人发起提问。"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 61,
      "metadata": {},
      "outputs": [],
      "source": [
        "def ask_for_info(ask_for=[\"name\",\"city\",\"email\"]):\n",
        "    # 定义一个提示模板\n",
        "    first_prompt = ChatPromptTemplate.from_template(\n",
        "        \"\"\"\n",
        "        假设你现在是一名前台，你现在需要对用户进行询问他个人的具体信息。\n",
        "        不要跟用户打招呼！你可以解释你需要什么信息。不要说“你好！”！\n",
        "        接下来你和用户之间的对话都是你来提问，凡是你说的都是问句。\n",
        "        你每次随机选择{ask_for}列表中的一个项目，向用户提问。\n",
        "        比如[\"name\",\"city\"]列表，你可以随机选择一个\"name\", 你的问题就是“请问你的名字是？”\n",
        "        \"\"\"\n",
        "    )\n",
        "    info_gathering_chain = LLMChain(llm=llm, prompt=first_prompt)\n",
        "    chat_chain = info_gathering_chain.run(ask_for=ask_for)\n",
        "    return chat_chain\n",
        "    "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 183,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "'请问你的名字是？'"
            ]
          },
          "execution_count": 183,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "ask_for_info(ask_for=[\"name\",\"city\",\"email\"])"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "定义一个处理模型返回信息的函数。"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 104,
      "metadata": {},
      "outputs": [],
      "source": [
        "def filter_response(text_input, user_details):\n",
        "    # 我们使用到openAI提供的打标签链, 用户的输入可以被这个链解析和映射。\n",
        "    chain = create_tagging_chain_pydantic(PersonalDetails,llm)\n",
        "    res = chain.run(text_input)\n",
        "    # 更新用户信息\n",
        "    user_details = add_non_empty_details(user_details,res)\n",
        "    ask_for = check_what_is_empty(user_details)\n",
        "    return user_details, ask_for"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "测试一下效果，我们要获取的就是最新的用户信息，筛选出来哪些项目还没有填写。\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 155,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "请问你的名字是？\n"
          ]
        }
      ],
      "source": [
        "def decide_ask(ask_for=[\"name\",\"city\",\"email\"]):\n",
        "    if ask_for:\n",
        "        ai_res = ask_for_info(ask_for=ask_for)\n",
        "        print(ai_res)\n",
        "    else:\n",
        "        print(\"全部填写完整\")\n",
        "decide_ask(ask_for)        "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 167,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "PersonalDetails(name='', city='', email='')"
            ]
          },
          "execution_count": 167,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "user_999_personal_details = PersonalDetails(name=\"\",city=\"\",email=\"\")\n",
        "user_999_personal_details"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 168,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "请问你的名字是？\n"
          ]
        }
      ],
      "source": [
        "decide_ask(ask_for)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 169,
      "metadata": {},
      "outputs": [],
      "source": [
        "str999 = \"我的名字是999,我住在北京\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 170,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Field 'email' 为空\n",
            "请问您的电子邮件地址是多少？\n"
          ]
        }
      ],
      "source": [
        "user_999_personal_details, ask_for_999 = filter_response(str999,user_999_personal_details)\n",
        "decide_ask(ask_for_999)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 173,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "全部填写完整\n"
          ]
        }
      ],
      "source": [
        "str999 = \"XX@XX.com\"\n",
        "user_999_personal_details, ask_for_999 = filter_response(str999,user_999_personal_details)\n",
        "decide_ask(ask_for_999)\n"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.7"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
